In [2]:
import sqlalchemy
import pandas as pd
import re

Read the mapping from SQL


In [3]:
e = sqlalchemy.create_engine('mysql://root:root@127.0.0.1:3306/UMLS_ext_mappings')

In [66]:
df0 = pd.read_sql("select CTV3_CONCEPTID, V2_CONCEPTID from RCD_V3_to_V2", e)\
        .drop_duplicates()\
        .sort_index(by='CTV3_CONCEPTID')
df0.describe()


Out[66]:
CTV3_CONCEPTID V2_CONCEPTID
count 292648 292648
unique 292542 80461
top XM1IG _DRUG
freq 2 56046

Number of entries in the mapping without Read2 code


In [67]:
df0[df0.V2_CONCEPTID.str.contains('_') == True].V2_CONCEPTID.value_counts()


Out[67]:
_DRUG    56046
_NONE    35462
dtype: int64

In [68]:
df = df0[df0.V2_CONCEPTID.str.contains('_') == False]

Number of occurrences of READ3 codes in mapping


In [70]:
occurrences = df.CTV3_CONCEPTID.value_counts().value_counts()
DataFrame(data={'codes': occurrences.values, 'occurrences': occurrences.index}, index=['',''])


Out[70]:
codes occurrences
200928 1
106 2

Test


In [71]:
# Fever
code3s = ["R0062", "X76Df", "X76Di", "X76EI", "XM0yv"]
df[df.CTV3_CONCEPTID.isin(code3s)]


Out[71]:
CTV3_CONCEPTID V2_CONCEPTID
70021 R0062 R0062
130491 X76Df 2....
130498 X76Di 2....
174476 XM0yv R006.

In [ ]: